#read data
data <- read.csv("workreports.csv", header = TRUE, fileEncoding = "GB18030")
data1=data[nchar(data$alltext)>=50,c("alltext","year","code","where","title")]

library("stm")
library("jiebaR")
comments_wk <- worker(type = 'mix',stop_word = "stopwords.txt")
tem=lapply(data1$alltext, segment, jiebar = comments_wk)
a=sapply(tem,function(x) paste(x,collapse=" "))

for (i in 1: nrow(data1)) {
  data1$text[i] <- a[i]
}

#prepare to estimation
processed <- textProcessor(data1$text,metadata = data1)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta

#use searchK to decide the number of themes
kSearch <- searchK(out$documents, out$vocab, K=c(10,30,50,70,90,100), prevalence=~s (year),init.type = "LDA", max.em.its = 15,data=meta)
plot(kSearch)
#so K=50

data2=data1[data1$year>=2013,]

processed1 <- textProcessor(data2$text,metadata = data2)
out1 <- prepDocuments(processed1$documents, processed1$vocab, processed1$meta)
docs1 <- out1$documents
vocab1 <- out1$vocab
meta1 <- out1$meta

#estimate STM
stm2 <- stm(docs1, vocab1, K =50, prevalence = ~s(year),max.em.its = 80, data = out1$meta, init.type = "LDA", seed = 8458159)

plot(stm2$convergence$bound, type = "l",ylab = "Approximate Objective",   main = "Convergence")


#visualize
prepstm2 <- estimateEffect(1:50 ~s(year), stm2, meta=out1$meta,  uncertainty="Global")
plot(stm2)


labelTopics(stm2)

for (i in 1:50) {
  #save the plot
  png(filename = paste0("Topic", "_",i, ".jpg"),width = 2400,height = 1800,res = 200)
  plot(prepstm2, covariate = "year",method="continuous", topics=i, model=stm2, printlegend=FALSE, xaxt="n", xlab="Year")
  axis(1,at = c(2013,2014,2015,2016,2017,2018), labels = c("2013","2014","2015","2016","2017","2018"))
  dev.off()
}

#Figure 5 contains Topic 31(Poverty Reduction), Topic 17(Environmental Protection), Topic 20(Loyalty to Xi) and Topic 6(Party Discipline and Law).
#Figure 6 contains Topic 26(Industrial Zone), Topic 22(Macroeconomic policies), Topic 10(Urbanization), Topic 33(Economic Growth).
#Figure A5 contains top Words for these topics.